#!/bin/bash
set -x

export VLLM_ENGINE_ITERATION_TIMEOUT_S=36000
export VLLM_RPC_TIMEOUT=36000000
export VLLM_ENFORCE_CUDA_GRAPH=0

vllm serve /mnt/share/1/Qwen/Qwen3-30B-A3B --served_model_name "qwen3-30" -tp 4 --max_model_len $[32*1024]  --trust_remote_code --host 0.0.0.0  --port 8000 --enable_chunked_prefill --enable_prefix_caching
# --enable-reasoning --reasoning-parser qwen3